library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
#library(corrplot)
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
#source("~/GitHub/FRESA.CAD/R/PoissonEventRiskCalibration.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
#pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)
data(cancer)
colon <- subset(colon,etype==1)
colon$etype <- NULL
rownames(colon) <- colon$id
colon$id <- NULL
colon <- colon[complete.cases(colon),]
time <- colon$time
status <- colon$status
data <- colon
data$time <- NULL
data$study <- NULL
table(data$status)
0 1 442 446
dataColon <- as.data.frame(model.matrix(status~.*age,data))
dataColon$`(Intercept)` <- NULL
dataColon$time <- time/365
dataColon$status <- status
colnames(dataColon) <-str_replace_all(colnames(dataColon),":","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\.","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\+","_")
data <- NULL
trainsamples <- sample(nrow(dataColon),0.7*nrow(dataColon))
dataColonTrain <- dataColon[trainsamples,]
dataColonTest <- dataColon[-trainsamples,]
pander::pander(table(dataColonTrain$status))
| 0 | 1 |
|---|---|
| 309 | 312 |
pander::pander(table(dataColonTest$status))
| 0 | 1 |
|---|---|
| 133 | 134 |
ml <- BSWiMS.model(Surv(time,status)~1,data=dataColonTrain,NumberofRepeats = 10)
[++-++++-+++-++-+++++-+++++-++++++]..
sm <- summary(ml)
pander::pander(sm$coefficients)
| Estimate | lower | HR | upper | u.Accuracy | |
|---|---|---|---|---|---|
| node4 | 2.96e-01 | 1.206 | 1.345 | 1.500 | 0.605 |
| age_nodes | -1.31e-05 | 1.000 | 1.000 | 1.000 | 0.610 |
| age_node4 | 3.37e-03 | 1.002 | 1.003 | 1.005 | 0.605 |
| nodes | 3.83e-02 | 1.017 | 1.039 | 1.062 | 0.609 |
| rxLev_5FU_age | -2.51e-03 | 0.996 | 0.997 | 0.999 | 0.557 |
| rxLev_5FU | -1.20e-01 | 0.822 | 0.887 | 0.957 | 0.557 |
| adhere | 1.39e-01 | 1.042 | 1.149 | 1.268 | 0.539 |
| age | -1.16e-03 | 0.998 | 0.999 | 1.000 | 0.519 |
| age_adhere | 1.14e-03 | 1.000 | 1.001 | 1.002 | 0.539 |
| extent | 9.35e-02 | 1.022 | 1.098 | 1.179 | 0.541 |
| r.Accuracy | full.Accuracy | u.AUC | r.AUC | full.AUC | |
|---|---|---|---|---|---|
| node4 | 0.579 | 0.618 | 0.607 | 0.578 | 0.619 |
| age_nodes | 0.557 | 0.616 | 0.611 | 0.557 | 0.617 |
| age_node4 | 0.598 | 0.611 | 0.607 | 0.598 | 0.612 |
| nodes | 0.586 | 0.618 | 0.609 | 0.586 | 0.618 |
| rxLev_5FU_age | 0.627 | 0.618 | 0.556 | 0.628 | 0.619 |
| rxLev_5FU | 0.611 | 0.611 | 0.556 | 0.612 | 0.612 |
| adhere | 0.608 | 0.620 | 0.541 | 0.609 | 0.620 |
| age | 0.615 | 0.618 | 0.519 | 0.616 | 0.619 |
| age_adhere | 0.617 | 0.621 | 0.541 | 0.618 | 0.621 |
| extent | 0.604 | 0.606 | 0.539 | 0.605 | 0.607 |
| IDI | NRI | z.IDI | z.NRI | Delta.AUC | Frequency | |
|---|---|---|---|---|---|---|
| node4 | 0.03866 | 0.414 | 5.75 | 6.08 | 4.12e-02 | 1.0 |
| age_nodes | 0.02188 | 0.387 | 4.94 | 5.20 | 6.01e-02 | 1.0 |
| age_node4 | 0.02538 | 0.390 | 4.54 | 5.76 | 1.39e-02 | 1.0 |
| nodes | 0.01669 | 0.274 | 3.94 | 3.61 | 3.27e-02 | 1.0 |
| rxLev_5FU_age | 0.01262 | 0.225 | 3.25 | 3.00 | -8.76e-03 | 1.0 |
| rxLev_5FU | 0.01125 | 0.225 | 3.10 | 3.00 | -4.97e-05 | 1.0 |
| adhere | 0.00732 | 0.169 | 2.79 | 3.01 | 1.15e-02 | 1.0 |
| age | 0.00841 | 0.146 | 2.77 | 1.82 | 2.83e-03 | 0.2 |
| age_adhere | 0.00650 | 0.212 | 2.66 | 3.52 | 3.46e-03 | 0.6 |
| extent | 0.00897 | 0.150 | 2.56 | 2.70 | 2.48e-03 | 0.8 |
Here we evaluate the model using the RRPlot() function.
Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years
index <- predict(ml,dataColonTrain)
timeinterval <- 2*mean(subset(dataColonTrain,status==1)$time)
h0 <- sum(dataColonTrain$status & dataColonTrain$time <= timeinterval)
h0 <- h0/sum((dataColonTrain$time > timeinterval) | (dataColonTrain$status==1))
rdata <- cbind(dataColonTrain$status,ppoisGzero(index,h0))
rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
timetoEvent=dataColonTrain$time,
title="Raw Train: Colon Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)
pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
| @:0.9 | @MAX_BACC | @MAX_RR | @SPE100 | |
|---|---|---|---|---|
| Thr | 0.497 | 0.332 | 0.253 | 0.22072 |
| RR | 1.622 | 1.833 | 2.277 | 1.00000 |
| SEN | 0.276 | 0.631 | 0.955 | 1.00000 |
| SPE | 0.896 | 0.667 | 0.149 | 0.00324 |
| BACC | 0.586 | 0.649 | 0.552 | 0.50162 |
pander::pander(t(rrAnalysisTrain$OERatio),caption="O/E Ratio")
| est | lower | upper |
|---|---|---|
| 0.995 | 0.888 | 1.11 |
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Ratio")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 1.51 | 1.51 | 1.48 | 1.54 |
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Ratio")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 1.36 | 1.36 | 1.35 | 1.36 |
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
| mean.C Index | median | lower | upper |
|---|---|---|---|
| 0.649 | 0.649 | 0.62 | 0.678 |
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
| est | lower | upper |
|---|---|---|
| 0.685 | 0.643 | 0.726 |
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
| est | lower | upper |
|---|---|---|
| 0.276 | 0.227 | 0.329 |
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
| est | lower | upper |
|---|---|---|
| 0.9 | 0.861 | 0.931 |
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
| 90% | at_max_BACC | at_max_RR | atSPE100 |
|---|---|---|---|
| 0.497 | 0.332 | 0.253 | 0.221 |
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
| est | lower | upper |
|---|---|---|
| 1.65 | 1.43 | 1.91 |
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
| N | Observed | Expected | (O-E)^2/E | (O-E)^2/V | |
|---|---|---|---|---|---|
| class=0 | 504 | 226 | 270 | 7.17 | 53.7 |
| class=1 | 117 | 86 | 42 | 46.07 | 53.7 |
op <- par(no.readonly = TRUE)
calprob <- CoxRiskCalibration(ml,dataColonTrain,"status","time")
pander::pander(c(h0=calprob$h0,
Gain=calprob$hazardGain,
DeltaTime=calprob$timeInterval),
caption="Cox Calibration Parameters")
| h0 | Gain | DeltaTime |
|---|---|---|
| 0.685 | 1.52 | 3.04 |
h0 <- calprob$h0
timeinterval <- calprob$timeInterval;
rdata <- cbind(dataColonTrain$status,calprob$prob)
rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
timetoEvent=dataColonTrain$time,
title="Calibrated Train: Colon",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)
pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
| @:0.9 | @MAX_BACC | @MAX_RR | @SPE100 | |
|---|---|---|---|---|
| Thr | 0.649 | 0.459 | 0.359 | 0.31610 |
| RR | 1.622 | 1.833 | 2.277 | 1.00000 |
| SEN | 0.276 | 0.631 | 0.955 | 1.00000 |
| SPE | 0.896 | 0.667 | 0.149 | 0.00324 |
| BACC | 0.586 | 0.649 | 0.552 | 0.50162 |
pander::pander(t(rrAnalysisTrain$OERatio),caption="O/E Ratio")
| est | lower | upper |
|---|---|---|
| 0.763 | 0.681 | 0.853 |
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Ratio")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 0.964 | 0.964 | 0.947 | 0.979 |
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Ratio")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 1.04 | 1.04 | 1.04 | 1.04 |
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
| mean.C Index | median | lower | upper |
|---|---|---|---|
| 0.649 | 0.649 | 0.619 | 0.68 |
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
| est | lower | upper |
|---|---|---|
| 0.685 | 0.643 | 0.726 |
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
| est | lower | upper |
|---|---|---|
| 0.276 | 0.227 | 0.329 |
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
| est | lower | upper |
|---|---|---|
| 0.9 | 0.861 | 0.931 |
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
| 90% | at_max_BACC | at_max_RR | atSPE100 |
|---|---|---|---|
| 0.649 | 0.459 | 0.359 | 0.316 |
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
| est | lower | upper |
|---|---|---|
| 1.65 | 1.43 | 1.91 |
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
| N | Observed | Expected | (O-E)^2/E | (O-E)^2/V | |
|---|---|---|---|---|---|
| class=0 | 504 | 226 | 270 | 7.17 | 53.7 |
| class=1 | 117 | 86 | 42 | 46.07 | 53.7 |
The calibrated h0 and timeinterval were estimated on the training set
index <- predict(ml,dataColonTest)
rdata <- cbind(dataColonTest$status,ppoisGzero(index,h0))
rrAnalysisTest <- RRPlot(rdata,atThr = rrAnalysisTrain$thr_atP,
timetoEvent=dataColonTest$time,
title="Test: Colon Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)
pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
| @:0.648769528990332 | @:0.459009260615184 | @:0.359085607123185 | |
|---|---|---|---|
| Thr | 0.644 | 0.458 | 0.359 |
| RR | 1.687 | 1.618 | 10.769 |
| SEN | 0.328 | 0.627 | 0.993 |
| SPE | 0.880 | 0.609 | 0.143 |
| BACC | 0.604 | 0.618 | 0.568 |
| @:0.316100113104116 | @MAX_BACC | @MAX_RR | @SPE100 | |
|---|---|---|---|---|
| Thr | 0.3515 | 0.435 | 0.362 | 0.3515 |
| RR | 63.0588 | 2.257 | 13.136 | 63.0588 |
| SEN | 1.0000 | 0.836 | 0.993 | 1.0000 |
| SPE | 0.0902 | 0.451 | 0.173 | 0.0902 |
| BACC | 0.5451 | 0.643 | 0.583 | 0.5451 |
pander::pander(t(rrAnalysisTest$OERatio),caption="O/E Ratio")
| est | lower | upper |
|---|---|---|
| 0.812 | 0.681 | 0.962 |
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Ratio")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 1.03 | 1.03 | 1.01 | 1.05 |
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Ratio")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 1.04 | 1.04 | 1.03 | 1.04 |
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
| mean.C Index | median | lower | upper |
|---|---|---|---|
| 0.671 | 0.672 | 0.631 | 0.719 |
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
| est | lower | upper |
|---|---|---|
| 0.695 | 0.632 | 0.757 |
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
| est | lower | upper |
|---|---|---|
| 0.328 | 0.25 | 0.415 |
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
| est | lower | upper |
|---|---|---|
| 0.887 | 0.821 | 0.935 |
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
| 90% | at_max_BACC | at_max_RR | atSPE100 | at_max_BACC | at_max_RR | atSPE100 |
|---|---|---|---|---|---|---|
| 0.649 | 0.459 | 0.359 | 0.316 | 0.435 | 0.362 | 0.352 |
pander::pander(t(rrAnalysisTest$RR_atP),caption="Risk Ratio")
| est | lower | upper |
|---|---|---|
| 1.69 | 1.36 | 2.1 |
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
| N | Observed | Expected | (O-E)^2/E | (O-E)^2/V | |
|---|---|---|---|---|---|
| class=0 | 132 | 51 | 75.3 | 7.84873 | 18.04578 |
| class=1 | 76 | 39 | 38.7 | 0.00177 | 0.00249 |
| class=2 | 59 | 44 | 19.9 | 28.99644 | 34.44194 |
Here we will cross validate the training set and evaluate also on the testing set. The h0 and the timeinterval are the ones estimated on the calibration process
rcv <- randomCV(theData=dataColonTrain,
theOutcome = Surv(time,status)~1,
fittingFunction=BSWiMS.model,
trainFraction = 0.75,
repetitions=50,
classSamplingType = "Pro",
testingSet=dataColonTest
)
.[++++++-].[++-].[++++].[++++-].[++++-].[+++++].[++-].[+++-].[+++].[+++]10 Tested: 856 Avg. Selected: 8 Min Tests: 1 Max Tests: 10 Mean Tests: 4.941589 . MAD: 0.4764983
.[++++].[++++-].[++++-].[++++-].[++].[+++++].[+++-].[++++].[++++-].[++]20 Tested: 887 Avg. Selected: 8.3 Min Tests: 1 Max Tests: 20 Mean Tests: 9.537768 . MAD: 0.4765619
.[++++-].[++++].[++++-].[+++].[++++-].[+++-].[+++++++].[+++].[++++-].[+++-]30 Tested: 888 Avg. Selected: 8.2 Min Tests: 1 Max Tests: 30 Mean Tests: 14.29054 . MAD: 0.4768923
.[++++].[++++-].[++-].[++-].[++++].[++++-].[+++].[+++-].[+++++++].[+++++]40 Tested: 888 Avg. Selected: 8.05 Min Tests: 3 Max Tests: 40 Mean Tests: 19.05405 . MAD: 0.476948
.[+++].[++-].[++++].[+++].[++++-].[++++-].[+++].[++++-].[++++].[+++]50 Tested: 888 Avg. Selected: 8.1 Min Tests: 4 Max Tests: 50 Mean Tests: 23.81757 . MAD: 0.4763361
stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]
bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)
rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names
rrAnalysisCVTest <- RRPlot(rdatacv,atThr = rrAnalysisTrain$thr_atP,
timetoEvent=times,
title="CV Test: Colon Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)
pander::pander(t(rrAnalysisCVTest$keyPoints),caption="Threshold values")
| @:0.648769528990332 | @:0.459009260615184 | @:0.359085607123185 | |
|---|---|---|---|
| Thr | 0.649 | 0.459 | 0.35811 |
| RR | 1.599 | 1.727 | 1.51020 |
| SEN | 0.300 | 0.742 | 0.99552 |
| SPE | 0.878 | 0.493 | 0.00905 |
| BACC | 0.589 | 0.618 | 0.50228 |
| @:0.316100113104116 | @MAX_BACC | @MAX_RR | @SPE100 | |
|---|---|---|---|---|
| Thr | 0.35354 | 0.463 | 0.418 | 0.35354 |
| RR | 1.00000 | 1.767 | 2.204 | 1.00000 |
| SEN | 1.00000 | 0.731 | 0.919 | 1.00000 |
| SPE | 0.00452 | 0.520 | 0.244 | 0.00452 |
| BACC | 0.50226 | 0.626 | 0.582 | 0.50226 |
pander::pander(t(rrAnalysisCVTest$OERatio),caption="O/E Ratio")
| est | lower | upper |
|---|---|---|
| 0.756 | 0.687 | 0.829 |
pander::pander(t(rrAnalysisCVTest$OE95ci),caption="O/E Ratio")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 0.93 | 0.929 | 0.917 | 0.943 |
pander::pander(t(rrAnalysisCVTest$OAcum95ci),caption="O/Acum Ratio")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 0.999 | 0.999 | 0.997 | 1 |
pander::pander(rrAnalysisCVTest$c.index$cstatCI,caption="C. Index")
| mean.C Index | median | lower | upper |
|---|---|---|---|
| 0.646 | 0.646 | 0.623 | 0.671 |
pander::pander(t(rrAnalysisCVTest$ROCAnalysis$aucs),caption="ROC AUC")
| est | lower | upper |
|---|---|---|
| 0.672 | 0.637 | 0.707 |
pander::pander((rrAnalysisCVTest$ROCAnalysis$sensitivity),caption="Sensitivity")
| est | lower | upper |
|---|---|---|
| 0.3 | 0.258 | 0.345 |
pander::pander((rrAnalysisCVTest$ROCAnalysis$specificity),caption="Specificity")
| est | lower | upper |
|---|---|---|
| 0.878 | 0.844 | 0.907 |
pander::pander(t(rrAnalysisCVTest$thr_atP),caption="Probability Thresholds")
| 90% | at_max_BACC | at_max_RR | atSPE100 | at_max_BACC | at_max_RR | atSPE100 |
|---|---|---|---|---|---|---|
| 0.649 | 0.459 | 0.359 | 0.316 | 0.463 | 0.418 | 0.354 |
pander::pander(t(rrAnalysisCVTest$RR_atP),caption="Risk Ratio")
| est | lower | upper |
|---|---|---|
| 1.6 | 1.42 | 1.81 |
pander::pander(rrAnalysisCVTest$surdif,caption="Logrank test")
| N | Observed | Expected | (O-E)^2/E | (O-E)^2/V | |
|---|---|---|---|---|---|
| class=0 | 334 | 115 | 196.8 | 33.97 | 61.2 |
| class=1 | 366 | 197 | 180.6 | 1.49 | 2.5 |
| class=2 | 188 | 134 | 68.6 | 62.26 | 74.1 |